read AST data - from NCBI

ast<-read_delim("Ecoli_AST.tsv.gz") %>% rename(BioSample=`#BioSample`)

length(unique(ast$BioSample))
## [1] 9094
ast %>% group_by(BioSample) %>% summarise(n=length(unique((Antibiotic))))%>% pull(n) %>% summary()
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   14.00   15.00   15.33   18.00   36.00

read ATB AMRfinderplus calls

afp <-read_tsv("ATB_Ecoli_AFP.tsv.gz")
gene_counts <- afp %>% group_by(`Gene symbol`, Class, Subclass) %>% count()

species_calls <- read_tsv("species_calls.tsv.gz")
ecoli <- species_calls %>% filter(Species=="Escherichia coli") %>% pull(Sample)

afp_status <- read_tsv("AMRFP_status.tsv.gz") %>% filter(sample %in% ecoli)
denominator <- afp_status %>% filter(status=="PASS") %>% nrow()

# afp results including null row for each genome that ran but returned no hits
afp_all <- afp_status %>% rename(Name=sample) %>% left_join(afp) %>% filter(status=="PASS")

read Enterobase typing info

ec_types <- read_tsv(file="ec_types_enterobase.tsv") %>% rename(Name=sample_accession)

get AMRfinderplus and AST data where both are available

sum(unique(ast$BioSample) %in% afp$Name)
## [1] 6768
# AST data for strains with AMRfinder run successfully
ast_matched <- ast %>% filter(BioSample %in% afp_status$sample[afp_status$status=="PASS"])

# AMRfinder results, for strains with matching AST data from NCBI
afp_matched <- afp %>% filter(Name %in% ast$BioSample)

# check objects
dim(ast_matched)
## [1] 107750     17
ast_matched %>% pull(BioSample) %>% unique() %>% length()
## [1] 6768
dim(afp_matched)
## [1] 184325      7
afp_matched %>% pull(Name) %>% unique() %>% length()
## [1] 6768

frequency of common core-gene mutations

# number with pmrB_Y358N
(gene_counts %>% filter(`Gene symbol`=="pmrB_Y358N") %>% pull(n))/denominator
## [1] 0.4652073
pmrB_Y358N <- summarise_marker_dist_ec_types(afp_all=afp_all, ec_types=ec_types, marker="pmrB_Y358N")

pmrB_Y358N$clermont
## # A tibble: 16 × 4
##    `Clermont Type (ClermonTyping)`     freq     n denom
##    <chr>                              <dbl> <dbl> <int>
##  1 -                               0.333        2     6
##  2 A                               0.295    14713 49862
##  3 B1                              0.976    53783 55130
##  4 B2                              0.000386    14 36252
##  5 C                               0.990     5496  5552
##  6 D                               0.0301     458 15207
##  7 E                               0.898    19666 21893
##  8 E or cladeI                     0.341       56   164
##  9 F                               0.00887     37  4171
## 10 G                               0.100      331  3307
## 11 ND                              0.418      220   526
## 12 Non Escherichia                 0            0     6
## 13 Unknown                         0.0233     336 14446
## 14 albertii                        0.25         1     4
## 15 cladeI                          0            0   653
## 16 fergusonii                      1            1     1
pmrB_Y358N$pathovar
## # A tibble: 19 × 4
##    Pathovar                 freq     n  denom
##    <chr>                   <dbl> <dbl>  <int>
##  1 -                    0.270    32596 120659
##  2 E. albertii          0.25         1      4
##  3 E. coli - EHEC       0.958    35843  37415
##  4 E. coli - EIEC       0.480      240    500
##  5 E. coli - EIEC/EHEC  1            1      1
##  6 E. coli - EIEC/EPEC  0            0      1
##  7 E. coli - EPEC       0.527     3351   6362
##  8 E. coli - ERROR      1            2      2
##  9 E. coli - ETEC       0.440     1439   3269
## 10 E. coli - ETEC/EHEC  0.875      126    144
## 11 E. coli - ETEC/EPEC  0.935       43     46
## 12 E. coli - ETEC/STEC  0.326      238    730
## 13 E. coli - STEC       0.810     9681  11954
## 14 ND                   0.418      220    526
## 15 Shigella             1            2      2
## 16 Shigella boydii      0.559      457    818
## 17 Shigella dysenteriae 0.981      916    934
## 18 Shigella flexneri    0.999     9953   9964
## 19 Shigella sonnei      0.000361     5  13849
pmrB_Y358N$clermont_plot

pmrB_Y358N$pathovar_plot

# number with glpT_E448K 
(gene_counts %>% filter(`Gene symbol`=="glpT_E448K") %>% pull(n))/denominator
## [1] 0.8692433
glpT_E448K <- summarise_marker_dist_ec_types(afp_all=afp_all, ec_types=ec_types, marker="glpT_E448K")

glpT_E448K$clermont
## # A tibble: 16 × 4
##    `Clermont Type (ClermonTyping)`  freq     n denom
##    <chr>                           <dbl> <dbl> <int>
##  1 -                               1         6     6
##  2 A                               0.572 28543 49862
##  3 B1                              0.997 54952 55130
##  4 B2                              0.948 34361 36252
##  5 C                               0.981  5448  5552
##  6 D                               0.996 15151 15207
##  7 E                               0.997 21826 21893
##  8 E or cladeI                     0.780   128   164
##  9 F                               0.997  4159  4171
## 10 G                               0.997  3297  3307
## 11 ND                              0.914   481   526
## 12 Non Escherichia                 1         6     6
## 13 Unknown                         0.987 14258 14446
## 14 albertii                        1         4     4
## 15 cladeI                          0.998   652   653
## 16 fergusonii                      1         1     1
glpT_E448K$pathovar
## # A tibble: 19 × 4
##    Pathovar              freq      n  denom
##    <chr>                <dbl>  <dbl>  <int>
##  1 -                    0.831 100296 120659
##  2 E. albertii          1          4      4
##  3 E. coli - EHEC       0.988  36980  37415
##  4 E. coli - EIEC       0.564    282    500
##  5 E. coli - EIEC/EHEC  1          1      1
##  6 E. coli - EIEC/EPEC  1          1      1
##  7 E. coli - EPEC       0.851   5411   6362
##  8 E. coli - ERROR      1          2      2
##  9 E. coli - ETEC       0.728   2381   3269
## 10 E. coli - ETEC/EHEC  0.868    125    144
## 11 E. coli - ETEC/EPEC  0.935     43     46
## 12 E. coli - ETEC/STEC  0.789    576    730
## 13 E. coli - STEC       0.939  11229  11954
## 14 ND                   0.914    481    526
## 15 Shigella             1          2      2
## 16 Shigella boydii      0.996    815    818
## 17 Shigella dysenteriae 1        934    934
## 18 Shigella flexneri    0.999   9959   9964
## 19 Shigella sonnei      0.993  13751  13849
glpT_E448K$clermont_plot

glpT_E448K$pathovar_plot

blaEC

# blaEC from full data, so we get accessions
blaEC <- read_tsv("bla_EC_hits.tsv")

blaEC_summary <- summarise_marker_dist_ec_types(afp_all=afp_all, ec_types=ec_types, marker="blaEC")

blaEC_summary$clermont_plot

blaEC_summary$pathovar_plot

# boxplot of identity of hits, stratified by accession of closest ref seq
blaEC %>% right_join(ec_types %>% filter(Name %in%unique(afp_all$Name))) %>% 
  mutate(ec_label=paste(`Gene symbol`, `Accession of closest sequence`)) %>% 
  ggplot(aes(x=ec_label, y=`% Identity to reference sequence`)) + geom_boxplot() + coord_flip()

# distribution of alleles per clade
blaEC_alleles_clade <- blaEC %>% right_join(ec_types %>% filter(Name %in%unique(afp_all$Name))) %>% 
  ggplot(aes(x=`Clermont Type (ClermonTyping)`, fill=`Accession of closest sequence`)) + geom_bar() + coord_flip()

# distribution of alleles per pathovar
blaEC_alleles_pathovar <- blaEC %>% right_join(ec_types %>% filter(Name %in%unique(afp_all$Name))) %>% 
  ggplot(aes(x=Pathovar, fill=`Accession of closest sequence`)) + geom_bar() + coord_flip()

blaEC_alleles_clade + blaEC_alleles_pathovar + plot_layout(guides="collect")

(gene_counts %>% filter(`Gene symbol`=="blaEC") %>% pull(n))/denominator
## [1] 0.9647534

solo markers for ceftriaxone

cef_cephalosporin <- solo_marker_atb(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "ceftriaxone", refgene_subclass = "CEPHALOSPORIN")

cef_cephalosporin$solo_stats
## # A tibble: 60 × 8
##    `Gene symbol` total category      n      p     se ci.lower ci.upper
##    <chr>         <int> <chr>     <int>  <dbl>  <dbl>    <dbl>    <dbl>
##  1 ampC_C-11T        2 resistant     0 0      0         0        0    
##  2 ampC_C-42T       29 resistant     1 0.0345 0.0339    0        0.101
##  3 ampC_T-32A       49 resistant     3 0.0612 0.0342    0        0.128
##  4 blaCMY            7 resistant     4 0.571  0.187     0.205    0.938
##  5 blaCMY-145        1 resistant     1 1      0         1        1    
##  6 blaCMY-17         1 resistant     1 1      0         1        1    
##  7 blaCMY-2        120 resistant   118 0.983  0.0117    0.960    1    
##  8 blaCMY-4          4 resistant     3 0.75   0.217     0.326    1    
##  9 blaCMY-42         3 resistant     3 1      0         1        1    
## 10 blaCMY-6          1 resistant     1 1      0         1        1    
## # ℹ 50 more rows
cef_cephalosporin$combined_plot

model for ceftriaxone

cef_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "ceftriaxone", refgene_subclass = "CEPHALOSPORIN")

cef_model <- logistf(resistant ~ ., data=cef_binary)

cef_logreg <- logreg_details(cef_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

cef_logreg

solo markers for fosfomycin

fos_fosfomycin <- solo_marker_atb(ast_matched = ast_matched, antibiotic="fosfomycin", refgene_subclass="FOSFOMYCIN", afp_matched=afp_matched)

fos_fosfomycin$solo_stats
## # A tibble: 6 × 8
##   `Gene symbol` total category           n      p     se ci.lower ci.upper
##   <chr>         <int> <chr>          <int>  <dbl>  <dbl>    <dbl>    <dbl>
## 1 glpT_E448K       35 resistant          0 0      0             0   0     
## 2 uhpT_E350Q        2 resistant          0 0      0             0   0     
## 3 <NA>             35 resistant          1 0.0286 0.0282        0   0.0838
## 4 glpT_E448K       35 nonsusceptible     0 0      0             0   0     
## 5 uhpT_E350Q        2 nonsusceptible     0 0      0             0   0     
## 6 <NA>             35 nonsusceptible     1 0.0286 0.0282        0   0.0838
fos_fosfomycin$combined_plot

model for fosfomycin

fos_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "fosfomycin", refgene_subclass = "FOSFOMYCIN")

fos_model <- logistf(resistant ~ ., data=fos_binary)

fos_logreg <- logreg_details(fos_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

fos_logreg

solo markers for colistin

colistin <- solo_marker_atb(ast_matched = ast_matched, antibiotic="colistin", refgene_subclass="COLISTIN", afp_matched=afp_matched)

colistin$solo_stats
## # A tibble: 10 × 8
##    `Gene symbol` total category           n       p       se ci.lower ci.upper
##    <chr>         <int> <chr>          <int>   <dbl>    <dbl>    <dbl>    <dbl>
##  1 mcr-1.1           1 resistant          0   0       0        0         0    
##  2 mcr-2.1           0 resistant          0 NaN     NaN      NaN       NaN    
##  3 pmrB_E123D       51 resistant          0   0       0        0         0    
##  4 pmrB_Y358N        6 resistant          0   0       0        0         0    
##  5 <NA>             48 resistant          7   0.146   0.0509   0.0460    0.246
##  6 mcr-1.1           1 nonsusceptible     1   1       0        1         1    
##  7 mcr-2.1           0 nonsusceptible     0 NaN     NaN      NaN       NaN    
##  8 pmrB_E123D       51 nonsusceptible    49   0.961   0.0272   0.908     1    
##  9 pmrB_Y358N        6 nonsusceptible     4   0.667   0.192    0.289     1    
## 10 <NA>             48 nonsusceptible    46   0.958   0.0288   0.902     1
colistin$combined_plot

model for colistin

col_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "colistin", refgene_subclass = "COLISTIN")

col_model <- logistf(resistant ~ ., data=col_binary)

col_logreg <- logreg_details(col_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

col_logreg

solo markers for gentamicin vs aminoglycosides

gentamicin_agly <- solo_marker_atb(ast_matched = ast_matched, antibiotic="gentamicin", refgene_class="AMINOGLYCOSIDE", afp_matched=afp_matched)

gentamicin_agly$solo_stats
## # A tibble: 34 × 8
##    `Gene symbol` total category      n      p     se ci.lower ci.upper
##    <chr>         <int> <chr>     <int>  <dbl>  <dbl>    <dbl>    <dbl>
##  1 aac(2')-IIa       1 resistant     0 0      0         0       0     
##  2 aac(3)-IId       39 resistant    39 1      0         1       1     
##  3 aac(3)-IIe       35 resistant    34 0.971  0.0282    0.916   1     
##  4 aac(3)-VIa        2 resistant     0 0      0         0       0     
##  5 aac(6')-Ib        1 resistant     0 0      0         0       0     
##  6 aac(6')-Ib4       6 resistant     0 0      0         0       0     
##  7 aadA1           131 resistant     2 0.0153 0.0107    0       0.0363
##  8 aadA16            1 resistant     0 0      0         0       0     
##  9 aadA2            32 resistant     0 0      0         0       0     
## 10 aadA22            4 resistant     0 0      0         0       0     
## # ℹ 24 more rows
gentamicin_agly$combined_plot

model for gentamicin

gent_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "gentamicin", refgene_class = "AMINOGLYCOSIDE")

gent_model_glm <- glm(resistant ~., family=binomial(link="logit"), data=gent_binary)

gent_logreg_glm <- summary(gent_model_glm)$coef %>% 
    as_tibble(rownames="marker") %>% 
    rename(est=Estimate, pval=`Pr(>|z|)`) %>% 
    select(marker, est, pval) %>%
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient", col="p<0.05")

gent_logreg_glm

gent_model <- logistf(resistant ~ ., data=gent_binary[,colSums(gent_binary)>=20])

gent_logistf <- logreg_details(gent_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

gent_logistf

solo markers for amikacin vs aminoglycosides

amikacin_agly <- solo_marker_atb(ast_matched = ast_matched, antibiotic="amikacin", refgene_class="AMINOGLYCOSIDE", afp_matched=afp_matched)

amikacin_agly$solo_stats
## # A tibble: 30 × 8
##    `Gene symbol` total category      n      p     se ci.lower ci.upper
##    <chr>         <int> <chr>     <int>  <dbl>  <dbl>    <dbl>    <dbl>
##  1 aac(2')-IIa       1 resistant     0 0      0             0   0     
##  2 aac(3)-IId       25 resistant     0 0      0             0   0     
##  3 aac(3)-IIe       35 resistant     1 0.0286 0.0282        0   0.0838
##  4 aac(6')-Ib        1 resistant     1 1      0             1   1     
##  5 aac(6')-Ib4       5 resistant     0 0      0             0   0     
##  6 aadA1            75 resistant     1 0.0133 0.0132        0   0.0393
##  7 aadA16            1 resistant     0 0      0             0   0     
##  8 aadA2            21 resistant     0 0      0             0   0     
##  9 aadA22            3 resistant     0 0      0             0   0     
## 10 aadA5            74 resistant     3 0.0405 0.0229        0   0.0855
## # ℹ 20 more rows
amikacin_agly$combined_plot

model for amikacin

amk_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "amikacin", refgene_class = "AMINOGLYCOSIDE")

amk_model <- logistf(resistant ~ ., data=amk_binary)

amk_logreg <- logreg_details(amk_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

amk_logreg

solo markers for tobramycin vs aminoglycosides

tobramycin_agly <- solo_marker_atb(ast_matched = ast_matched, antibiotic="tobramycin", refgene_class="AMINOGLYCOSIDE", afp_matched=afp_matched)

tobramycin_agly$solo_stats
## # A tibble: 30 × 8
##    `Gene symbol` total category      n     p     se ci.lower ci.upper
##    <chr>         <int> <chr>     <int> <dbl>  <dbl>    <dbl>    <dbl>
##  1 aac(2')-IIa       1 resistant     0 0     0         0        0    
##  2 aac(3)-IId       25 resistant     9 0.36  0.096     0.172    0.548
##  3 aac(3)-IIe       27 resistant    21 0.778 0.0800    0.621    0.935
##  4 aac(6')-Ib        1 resistant     1 1     0         1        1    
##  5 aac(6')-Ib4       5 resistant     2 0.4   0.219     0        0.829
##  6 aadA1            75 resistant     0 0     0         0        0    
##  7 aadA16            1 resistant     0 0     0         0        0    
##  8 aadA2            19 resistant     0 0     0         0        0    
##  9 aadA22            3 resistant     0 0     0         0        0    
## 10 aadA5            68 resistant    26 0.382 0.0589    0.267    0.498
## # ℹ 20 more rows
tobramycin_agly$combined_plot

model for tobramycin

tob_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "tobramycin", refgene_class = "AMINOGLYCOSIDE")

tob_model <- logistf(resistant ~ ., data=tob_binary)

tob_logreg <- logreg_details(tob_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

tob_logreg

solo markers for ampicillin

ampicillin <- solo_marker_atb(ast_matched = ast_matched, antibiotic="ampicillin", refgene_class="BETA-LACTAM", afp_matched=afp_matched)

ampicillin$solo_stats
## # A tibble: 6 × 8
##   `Gene symbol` total category           n      p      se ci.lower ci.upper
##   <chr>         <int> <chr>          <int>  <dbl>   <dbl>    <dbl>    <dbl>
## 1 blaEC          3083 resistant         63 0.0204 0.00255   0.0154   0.0254
## 2 blaEC-5         246 resistant          9 0.0366 0.0120    0.0131   0.0600
## 3 <NA>           1923 resistant       1872 0.973  0.00366   0.966    0.981 
## 4 blaEC          3083 nonsusceptible    68 0.0221 0.00265   0.0169   0.0272
## 5 blaEC-5         246 nonsusceptible     9 0.0366 0.0120    0.0131   0.0600
## 6 <NA>           1923 nonsusceptible  1877 0.976  0.00348   0.969    0.983
ampicillin$combined_plot

model for ampicillin

amp_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "ampicillin", refgene_class = "BETA-LACTAM")

#amp_model <- logistf(resistant ~ ., data=amp_binary[,colSums(amp_binary)>=20])

amp_model_glm <- glm(resistant ~., family=binomial(link="logit"), data=amp_binary)

amp_logreg_glm <- summary(amp_model_glm)$coef %>% 
    as_tibble(rownames="marker") %>% 
    rename(est=Estimate, pval=`Pr(>|z|)`) %>% 
    select(marker, est, pval) %>%
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient", col="p<0.05")

amp_logreg_glm

solo markers for azithromycin

azi <- solo_marker_atb(ast_matched = ast_matched, antibiotic="azithromycin", refgene_subclass="AZITHROMYCIN", afp_matched=afp_matched)

azi$solo_stats
## # A tibble: 6 × 8
##   `Gene symbol` total category           n       p       se  ci.lower ci.upper
##   <chr>         <int> <chr>          <int>   <dbl>    <dbl>     <dbl>    <dbl>
## 1 mef(B)            3 resistant          0 0       0        0          0      
## 2 mph(A)            8 resistant          7 0.875   0.117    0.646      1      
## 3 <NA>           2770 resistant          4 0.00144 0.000722 0.0000299  0.00286
## 4 mef(B)            3 nonsusceptible     0 0       0        0          0      
## 5 mph(A)            8 nonsusceptible     7 0.875   0.117    0.646      1      
## 6 <NA>           2770 nonsusceptible     4 0.00144 0.000722 0.0000299  0.00286
azi$combined_plot

model for azithromycin

azi_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "azithromycin", refgene_subclass = "AZITHROMYCIN", maf=1)

azi_model <- logistf(resistant ~ ., data=azi_binary)

azi_logreg <- logreg_details(azi_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

azi_logreg

solo markers for imipenem

imipenem <- solo_marker_atb(ast_matched = ast_matched, antibiotic="imipenem", refgene_subclass="CARBAPENEM", afp_matched=afp_matched)

imipenem$solo_stats
## # A tibble: 26 × 8
##    `Gene symbol` total category      n     p     se ci.lower ci.upper
##    <chr>         <int> <chr>     <int> <dbl>  <dbl>    <dbl>    <dbl>
##  1 blaKPC-2          4 resistant     4 1     0         1        1    
##  2 blaKPC-3         12 resistant     8 0.667 0.136     0.400    0.933
##  3 blaKPC-4          2 resistant     1 0.5   0.354     0        1    
##  4 blaNDM-1         10 resistant     9 0.9   0.0949    0.714    1    
##  5 blaNDM-5          4 resistant     4 1     0         1        1    
##  6 blaNDM-6          1 resistant     1 1     0         1        1    
##  7 blaNDM-7          4 resistant     4 1     0         1        1    
##  8 blaOXA-181        1 resistant     0 0     0         0        0    
##  9 blaOXA-48         1 resistant     1 1     0         1        1    
## 10 ompC_Q171STOP     2 resistant     0 0     0         0        0    
## # ℹ 16 more rows
imipenem$combined_plot

model for imipenem

imp_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "imipenem", refgene_subclass = "CARBAPENEM", maf=5)

imp_model <- logistf(resistant ~ ., data=imp_binary)

imp_logreg <- logreg_details(imp_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

imp_logreg

solo markers for meropenem

meropenem <- solo_marker_atb(ast_matched = ast_matched, antibiotic="meropenem", refgene_subclass="CARBAPENEM", afp_matched=afp_matched)

meropenem$solo_stats
## # A tibble: 26 × 8
##    `Gene symbol` total category      n     p    se ci.lower ci.upper
##    <chr>         <int> <chr>     <int> <dbl> <dbl>    <dbl>    <dbl>
##  1 blaKPC-2          4 resistant     3  0.75 0.217    0.326    1    
##  2 blaKPC-3         12 resistant     6  0.5  0.144    0.217    0.783
##  3 blaKPC-4          2 resistant     0  0    0        0        0    
##  4 blaNDM-1         11 resistant    11  1    0        1        1    
##  5 blaNDM-5          8 resistant     8  1    0        1        1    
##  6 blaNDM-6          1 resistant     1  1    0        1        1    
##  7 blaNDM-7          4 resistant     4  1    0        1        1    
##  8 blaOXA-181        1 resistant     0  0    0        0        0    
##  9 blaOXA-48         1 resistant     1  1    0        1        1    
## 10 ompC_Q171STOP     2 resistant     1  0.5  0.354    0        1    
## # ℹ 16 more rows
meropenem$combined_plot

model for meropenem

mer_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "meropenem", refgene_subclass = "CARBAPENEM", maf=5)

mer_model <- logistf(resistant ~ ., data=mer_binary)

mer_logreg <- logreg_details(mer_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

mer_logreg

solo markers for ceftazidime

ceftazidime_cephalosporin <- solo_marker_atb(ast_matched = ast_matched, antibiotic="ceftazidime", refgene_subclass="CEPHALOSPORIN", afp_matched=afp_matched)

ceftazidime_cephalosporin$solo_stats
## # A tibble: 66 × 8
##    `Gene symbol` total category      n      p     se ci.lower ci.upper
##    <chr>         <int> <chr>     <int>  <dbl>  <dbl>    <dbl>    <dbl>
##  1 ampC_C-11T        5 resistant     0 0      0         0       0     
##  2 ampC_C-42T       29 resistant     2 0.0690 0.0471    0       0.161 
##  3 ampC_G-15GG       2 resistant     0 0      0         0       0     
##  4 ampC_T-14TGT      2 resistant     0 0      0         0       0     
##  5 ampC_T-32A       58 resistant     1 0.0172 0.0171    0       0.0507
##  6 blaCMY            2 resistant     0 0      0         0       0     
##  7 blaCMY-145        1 resistant     1 1      0         1       1     
##  8 blaCMY-2        117 resistant   104 0.889  0.0291    0.832   0.946 
##  9 blaCMY-4          4 resistant     3 0.75   0.217     0.326   1     
## 10 blaCMY-42         5 resistant     5 1      0         1       1     
## # ℹ 56 more rows
ceftazidime_cephalosporin$combined_plot

model for ceftazidime

caz_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "ceftazidime", refgene_subclass = "CEPHALOSPORIN")

caz_model <- logistf(resistant ~ ., data=caz_binary)

caz_logreg <- logreg_details(caz_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

caz_logreg

solo markers for chloramphenicol

chloramphenicol <- solo_marker_atb(ast_matched = ast_matched, antibiotic="chloramphenicol", refgene_class="PHENICOL", afp_matched=afp_matched)

chloramphenicol$solo_stats
## # A tibble: 14 × 8
##    `Gene symbol` total category        n         p        se  ci.lower  ci.upper
##    <chr>         <int> <chr>       <int>     <dbl>     <dbl>     <dbl>     <dbl>
##  1 catA1            17 resistant      17   1         0         1         1      
##  2 catA2             1 resistant       1   1         0         1         1      
##  3 catB3             0 resistant       0 NaN       NaN       NaN       NaN      
##  4 cmlA1            21 resistant      17   0.810     0.0857    0.642     0.977  
##  5 cmlA5             0 resistant       0 NaN       NaN       NaN       NaN      
##  6 floR             84 resistant      83   0.988     0.0118    0.965     1      
##  7 <NA>           2670 resistant      13   0.00487   0.00135   0.00223   0.00751
##  8 catA1            17 nonsuscept…    17   1         0         1         1      
##  9 catA2             1 nonsuscept…     1   1         0         1         1      
## 10 catB3             0 nonsuscept…     0 NaN       NaN       NaN       NaN      
## 11 cmlA1            21 nonsuscept…    18   0.857     0.0764    0.707     1      
## 12 cmlA5             0 nonsuscept…     0 NaN       NaN       NaN       NaN      
## 13 floR             84 nonsuscept…    83   0.988     0.0118    0.965     1      
## 14 <NA>           2670 nonsuscept…    57   0.0213    0.00280   0.0159    0.0268
chloramphenicol$combined_plot

model for chloramphenicol

chloramphenicol_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "chloramphenicol", refgene_class = "PHENICOL")

chloramphenicol_model <- logistf(resistant ~ ., data=chloramphenicol_binary)

chloramphenicol_logreg <- logreg_details(chloramphenicol_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

chloramphenicol_logreg

solo markers for trimethoprim

tmx <- solo_marker_atb(ast_matched = ast_matched, antibiotic="trimethoprim-sulfamethoxazole", refgene_class=c("TRIMETHOPRIM", "SULFONAMIDE"), afp_matched=afp_matched)

tmx$solo_stats
## # A tibble: 26 × 8
##    `Gene symbol` total category      n     p     se ci.lower ci.upper
##    <chr>         <int> <chr>     <int> <dbl>  <dbl>    <dbl>    <dbl>
##  1 dfrA1            32 resistant     9 0.281 0.0795   0.125     0.437
##  2 dfrA12            5 resistant     0 0     0        0         0    
##  3 dfrA14            8 resistant     3 0.375 0.171    0.0395    0.710
##  4 dfrA16            3 resistant     0 0     0        0         0    
##  5 dfrA17           18 resistant    11 0.611 0.115    0.386     0.836
##  6 dfrA5             8 resistant     0 0     0        0         0    
##  7 dfrA7            14 resistant     2 0.143 0.0935   0         0.326
##  8 dfrA8             3 resistant     2 0.667 0.272    0.133     1    
##  9 folP_P64S         1 resistant     0 0     0        0         0    
## 10 sul1            220 resistant     0 0     0        0         0    
## # ℹ 16 more rows
tmx$combined_plot

model for trimethoprim

tmx_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "trimethoprim-sulfamethoxazole", refgene_class = c("TRIMETHOPRIM", "SULFONAMIDE"))

tmx_model <- logistf(resistant ~ ., data=tmx_binary)

tmx_logreg <- logreg_details(tmx_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

tmx_logreg

# solo markers for tetracycline

tet <- solo_marker_atb(ast_matched = ast_matched, antibiotic="tetracycline", refgene_class="TETRACYCLINE", afp_matched=afp_matched)

tet$solo_stats
## # A tibble: 16 × 8
##    `Gene symbol` total category           n      p      se ci.lower ci.upper
##    <chr>         <int> <chr>          <int>  <dbl>   <dbl>    <dbl>    <dbl>
##  1 acrB_R620C        3 resistant          0 0      0         0        0     
##  2 tet(A)          943 resistant        938 0.995  0.00236   0.990    0.999 
##  3 tet(B)          826 resistant        818 0.990  0.00341   0.984    0.997 
##  4 tet(C)           47 resistant         15 0.319  0.0680    0.186    0.452 
##  5 tet(D)           28 resistant         28 1      0         1        1     
##  6 tet(H)            2 resistant          2 1      0         1        1     
##  7 tet(M)            6 resistant          0 0      0         0        0     
##  8 <NA>           3318 resistant        177 0.0533 0.00390   0.0457   0.0610
##  9 acrB_R620C        3 nonsusceptible     0 0      0         0        0     
## 10 tet(A)          943 nonsusceptible   938 0.995  0.00236   0.990    0.999 
## 11 tet(B)          826 nonsusceptible   818 0.990  0.00341   0.984    0.997 
## 12 tet(C)           47 nonsusceptible    43 0.915  0.0407    0.835    0.995 
## 13 tet(D)           28 nonsusceptible    28 1      0         1        1     
## 14 tet(H)            2 nonsusceptible     2 1      0         1        1     
## 15 tet(M)            6 nonsusceptible     0 0      0         0        0     
## 16 <NA>           3318 nonsusceptible   193 0.0582 0.00406   0.0502   0.0661
tet$combined_plot

model for tetracycline

tet_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "tetracycline", refgene_class = "TETRACYCLINE")

tet_model <- logistf(resistant ~ ., data=tet_binary)

tet_logreg <- logreg_details(tet_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

tet_logreg

solo markers for tigecycline

tig <- solo_marker_atb(ast_matched = ast_matched, antibiotic="tigecycline", refgene_subclass="TIGECYCLINE", afp_matched=afp_matched)

tig$solo_stats
## # A tibble: 2 × 8
##   `Gene symbol` total category           n      p      se ci.lower ci.upper
##   <chr>         <int> <chr>          <int>  <dbl>   <dbl>    <dbl>    <dbl>
## 1 <NA>            153 resistant          2 0.0131 0.00918        0   0.0311
## 2 <NA>            153 nonsusceptible     2 0.0131 0.00918        0   0.0311

solo markers for ciprofloxacin

cip <- solo_marker_atb(ast_matched = ast_matched, antibiotic="ciprofloxacin", refgene_subclass="QUINOLONE", afp_matched=afp_matched)

cip$solo_stats
## # A tibble: 46 × 8
##    `Gene symbol`  total category      n       p      se ci.lower ci.upper
##    <chr>          <int> <chr>     <int>   <dbl>   <dbl>    <dbl>    <dbl>
##  1 aac(6')-Ib-cr5     1 resistant     0 0       0       0          0     
##  2 gyrA_D87G          4 resistant     0 0       0       0          0     
##  3 gyrA_D87N          5 resistant     0 0       0       0          0     
##  4 gyrA_D87Y          3 resistant     1 0.333   0.272   0          0.867 
##  5 gyrA_S83A         12 resistant     0 0       0       0          0     
##  6 gyrA_S83L        111 resistant    17 0.153   0.0342  0.0862     0.220 
##  7 marR_S3N         524 resistant     4 0.00763 0.00380 0.000181   0.0151
##  8 parC_A56T          7 resistant     0 0       0       0          0     
##  9 parC_S57T         40 resistant     0 0       0       0          0     
## 10 parE_D475E        91 resistant     0 0       0       0          0     
## # ℹ 36 more rows
cip$combined_plot

model for ciprofloxacin

cip_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "ciprofloxacin", refgene_class = "QUINOLONE")

cip_model <- logistf(resistant ~ ., data=cip_binary)

cip_logreg <- logreg_details(cip_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

cip_logreg